########################################################
# This code trains the baseline 2006 model with alternative hyperparameter 
# tuning settings.
########################################################

########################################################
## Preparation of the workspace
########################################################

## remove all objects from the current workspace
rm(list=ls())

## load the required packages
library(haven)
library(caret)
library(randomForest)
library(doParallel)
library(mice)
library(plyr)
library(dplyr)
library(VIM)
library(base)
library(ranger)
library(glmnet)
library(xgboost)

## display the current time -> to check how much time it takes to run the code
start_time = Sys.time()

## set the directories
main <- "placeholder_main"

dataDirectory <- paste0(main, "/Data")
RDirectory <- paste0(main, "/Programs/Output Generation")

## Load the tuning and training functions:
source(paste0(RDirectory, "/102_0_caret_parameter_tuning_Function.R"))
source(paste0(RDirectory, "/103_0_caret_predictions_Function.R"))


########################################################
## Define the locals for the loop
########################################################
 
# Define the model that is used (the name of the model is a part of the name of the .dta file with data)
#models <- c("Full")
model_list <- "Full"

# Define the outcome variables that are predicted
dependent_variable <- c("emplAft6M_0M_In")

# Define the time period:
years <- 2006

splits <- list(c(0.1, 0.2), c(0.2, 0.2), c(0.1, 0.4))


########################################################
## Run the loop
########################################################   

for (model in model_list) {
  
  for (y in years) {
    
    for (split in splits) {
    
      for (dependent in dependent_variable){
        
        if (dependent == "emplAft6M_0M_In") {
          ## Load the dataset:
          data <- read_dta(paste0(dataDirectory, "/002_DataForR_", model, "_", y,".dta"))
          
          ## Run the tuning function:
          parameters <- tuning(data = data, dependent = dependent,
                               s_tuning = split[1], seed = 2111)
          
          write.csv(parameters$rfgrid_final, file = paste0(dataDirectory,"/102_rfgrid_" , model,"_ML_Robustness_", 100*split[1], 100*split[2], "Split_", dependent, "_", y, ".csv"), row.names = FALSE)
          write.csv(parameters$boostgrid_final, file = paste0(dataDirectory,"/102_boostgrid_", model, "_ML_Robustness_", 100*split[1], 100*split[2], "Split_", dependent, "_", y, ".csv"), row.names = FALSE)
          write.csv(parameters$lassogrid_final, file = paste0(dataDirectory,"/102_lassogrid_", model, "_ML_Robustness_", 100*split[1], 100*split[2], "Split_", dependent, "_", y, ".csv"), row.names = FALSE)
      
          
          ########################################################
          ## Run the prediction function
          ########################################################
          results <- estimating(data = data, dependent = dependent,
                                parameters = parameters,
                                s_tuning = split[1], s_training = split[2], seed = 2111)
          
        } else {
          results <- estimating(data = data, dependent = dependent,
                                parameters = parameters,
                                s_tuning = 0, s_training = sum(split), seed = 2111)
        }
        
        ########################################################
        ## Save models
        ########################################################
        models <- results$models
        save(models,
             file=paste0(dataDirectory,"/103_Models_", model,"_ML_Robustness_", 100*split[1], 100*split[2], "Split_",dependent, "_", y, ".rda"))
        
        ########################################################
        ## Save predictions
        ########################################################
        write.csv(results$output,
                  file=paste0(dataDirectory,"/103_predictionsR_", model,"_ML_Robustness_", 100*split[1], 100*split[2], "Split_",dependent, "_", y, ".csv"))
        
      }
    }
  }
}

